* ==============================================================================
* Date: Dec 13, 2024
* Project: Health and anti-immigration attitudes
*
* Dataset: hlthmigr.dta
* 
*
* This file contains the code to analyse the clustering of the data, and to 
* produce regression tables and figures.
* ==============================================================================

break

					* Prepare for analyses
					* ====================
**# Bookmark #1


*** Install packages

* ssc install coefplot, replace
* ssc install cleanplots, replace
* ssc install full_palette, replace
* ssc install std_beta, replace
* ssc install schemepack, replace

*** Define directory			
clear all
set more off

* Insert your working directory here:
global PATH "..."
cd $PATH

use data_hlthmigr/hlthmigr.dta, clear
set scheme white_tableau


*** Define global variables for analyses			
** Demography
global Demogr	agea i.gndr2 i.mrtsts4 i.domicil3 i.brncntr2
** Socioeconomic factors 
global SES		eduyrs i.oesch5 hinctntb i.hincfel2 i.uempla i.hincsrc2a
** Political-cultural factors
global Cult		lrscale i.nwspol2 i.clsprty2 rlgdgr impsafe ipstrgv imptrad
** Clustering
global Clstr 	i.cntryc i.essround

					
					
					* Non-linear prediction plotted
					* =============================

					
* set minimal model sample size
qui reg migrw100 health $Demogr $SES $Cult $Clstr [aw=anweight]
cap drop in_pols
gen in_pols = e(sample)

* Estimate adjusted margins for health
qui reg migrw100 i.health $Demogr $SES $Cult $Clstr if in_pols [aw=anweight], cluster(cntryround)
margins health, atmeans post

* Save the results directly from the margins table
matrix M = r(table) // Save the margins results matrix
clear // Start with a fresh dataset
set obs `=colsof(M)' // Create rows for each hlth5 category
gen health = _n // Generate hlth5 categories
gen predicted = M[1, _n] // Margins (predicted values)
gen ci_lower = M[5, _n] // Lower bound of confidence interval
gen ci_upper = M[6, _n] // Upper bound of confidence interval
save output_hlthmigr/margins_data.dta, replace

* Calculate the frequency distribution
use data_hlthmigr/hlthmigr.dta, clear
egen freq = count(health), by(health)
gen health_freq = 100 * freq / _N // Normalize to percentages

* Reduce the dataset to one row per health category
keep health health_freq
duplicates drop health, force // Ensure only one observation per health category
save output_hlthmigr/frequency_data.dta, replace

* Merge margins data with frequency data
use output_hlthmigr/margins_data.dta, clear
merge 1:1 health using output_hlthmigr/frequency_data.dta
drop if _merge==2

lab var health "Self-rated health"
cap lab drop health
lab def health 1 "Very good" 2 "Good" 3 "Fair" 4 "Bad" 5 "Very bad"
lab val health health
save output_hlthmigr/marginsfre_data.dta, replace

* Overlay the frequency bar graph and marginsplot with dual Y-axes
twoway ///
    (bar health_freq health, barwidth(0.4) bcolor(gs12) yaxis(2)) ///
    (rcap ci_lower ci_upper health, color(navy%90) yaxis(1)) ///
    (scatter predicted health, msymbol(circle) mcolor(cranberry%90) msize(medium) yaxis(1)) ///
    (line predicted health, lcolor(navy%90) lwidth(medium) yaxis(1)), ///
    ytitle("Worried about immigration (predicted score)", axis(1)) ///
    ytitle("Distribution of respondents by health (%) ", axis(2)) ///
    xlabel(, angle(45) valuelabel labsize(*0.9)) ///
    ylabel(40(3)55, axis(1)) ///
    ylabel(0(10)50, axis(2)) ///
    legend(order(1 "Frequency Distribution" 2 "Confidence Interval" 3 "Predicted Means" 4 "Line of Fit") /// 
           cols(1) pos(2) ring(0) size(vsmall) region(style(none) margin(r+10)))
		   
graph export output_hlthmigr/healthmigrcombo.emf, as(emf) replace



					* Main OLS results
					* ================

clear all
use data_hlthmigr/hlthmigr.dta, clear

** Not standardized
* set minimal model sample size
qui reg migrw100 health $Demogr $SES $Cult $Clstr [aw=anweight]
cap drop in_pols
gen in_pols = e(sample)

* model1: self-rated health
reg migrw100 health $Clstr if in_pols [aw=anweight], cluster(cntryround)
est sto pols1

* model2: self-rated health + demography
reg migrw100 health $Demogr $Clstr if in_pols [aw=anweight], cluster(cntryround)
est sto pols2

* model3: self-rated health + demography + socioeconomic factors
reg migrw100 health $Demogr $SES $Clstr if in_pols [aw=anweight], cluster(cntryround)
est sto pols3

* model4: self-rated health + demography + political-cultural factors 
reg migrw100 health $Demogr $Cult $Clstr if in_pols [aw=anweight], cluster(cntryround)
est sto pols4

* model5: fully adjusted
reg migrw100 health $Demogr $SES $Cult $Clstr if in_pols [aw=anweight], cluster(cntryround)
est sto pols5


* Reduced regressiont table, main text
#delimit ;
estout 	pols*
		using output_hlthmigr/pols.txt
		, replace cells(b(star fmt("%9.3fc")) se(par fmt("%9.3fc"))) stats(r2_a N, fmt(%9.3fc %9.0fc) 
		labels("R-Square" "No. of observations" )) label varlabels(_cons Constant) 
		legend nobaselevels noomitted mlabels(none) numbers
		mgroups("Worried about immigration", pattern(1 0 0 0 0))
		indicate("ESS-round (year) fixed effects = *.essround" "Country fixed effects = *.cntryc"
		"Demographic factors = agea *.gndr2 *.mrtsts4 *.domicil3 *.brncntr2"
		"Socioeconomic factors = eduyrs *.oesch5 hinctntb *.hincfel2 *.uempla *.hincsrc2a"
		"Political-cultural factors = lrscale *.nwspol2 *.clsprty2 rlgdgr impsafe ipstrgv imptrad")
		;
#delimit cr


* Full regression table, appendix
#delimit ;
estout 	pols*
		using output_hlthmigr/polsfull.txt
		, replace cells(b(star fmt("%9.3fc")) se(par fmt("%9.3fc"))) stats(r2_a N, fmt(%9.3fc %9.0fc) 
		labels("R-Square" "No. of observations" )) label varlabels(_cons Constant) 
		legend nobaselevels noomitted mlabels(none) numbers
		mgroups("Worried about immigration", pattern(1 0 0 0 0))
		indicate("ESS-round (year) fixed effects = *.essround" "Country fixed effects = *.cntryc")
		refcat(agea "Demographic factors" eduyrs "Socioeconomic factors" 
		lrscale "Political-cultural factors" 
		2.oesch5 "Social class (ref. cat.: Higher-grade service)"
		2.mrtsts4 "Marital status (ref. cat.: Married or partnered)"
		2.domicil3 "Domicil (ref. cat.: Big city or suburb)", nolabel)
		;
#delimit cr

** Standardized coefficients

reg z_migrw100 z_health z_agea z_gndr2 z_mrtsts4tab2 z_mrtsts4tab3 z_mrtsts4tab4 ///
	z_domicil3tab2 z_domicil3tab3 z_brncntr2 ///
	z_eduyrs z_oesch5tab2-z_oesch5tab5 z_hinctntb z_hincfel2 z_uempla z_hincsrc2a ///
	z_lrscale z_nwspol2 z_clsprty2 z_rlgdgr z_impsafe z_ipstrgv z_imptrad ///
	$Clstr if in_pols [aw=anweight], cluster(cntryround)
est sto stand
	
coefplot 	stand, drop(*.cntryc *.essround _cons) ///
			xline(0, lwidth(vthin) lcolor(cranberry%90)) ///
			xlabel(-0.2(0.05)0.2)  xsize(3.3) ysize(2.3) ///
			xtitle("Standardized regression coefficient" ///
			, size(small))

graph export "output_hlthmigr\pols-std.emf", as(emf) replace

* regression table
#delimit ;
estout 	pols5 stand
		using output_hlthmigr/pols-std.txt
		, replace cells(b(star fmt("%9.3fc")) se(par fmt("%9.3fc"))) stats(r2_a N, fmt(%9.3fc %9.0fc) 
		labels("R-Square" "No. of observations" )) label varlabels(_cons Constant) 
		legend nobaselevels noomitted mlabels(none) numbers
		mgroups("Worried about immigration", pattern(1 0 0 0 0))
		indicate("ESS-round (year) fixed effects = *.essround" "Country fixed effects = *.cntryc")
		refcat(agea "Demographic factors" eduyrs "Socioeconomic factors" 
		lrscale "Political-cultural factors" 1.hlth2a "Self-rated health (ref. cat.: Good)" 
		2.oesch5 "Social class (ref. cat.: Higher-grade service)"
		2.mrtsts4 "Marital status (ref. cat.: Married or partnered)"
		2.domicil3 "Domicil (ref. cat.: Big city or suburb)", nolabel)
		;
#delimit cr


					* Heterogeneity
					* =============

**# Bookmark #3

*** Racial minorities in living area // rounds 1/7

reg migrw100 c.health##acetalv2 $Demogr $SES $Cult $Clstr, cluster(cntryround)
est sto area
margins, at(health=(1(1)5) acetalv2=(0 1))

#delimit 		;
marginsplot, 	plot1opts(lcolor(navy%90) mcolor(navy%90)) ci1opts(lcolor(navy%90))
				plot2opts(lcolor(cranberry%90) mcolor(cranberry%90)) ci2opts(lcolor(cranberry%90))
				title("") ytitle("Worried about immigration") xlabel(,angle(45) labsize(*0.9)) 
				xtitle("Self-rated health") 
				xsc(titlegap(*-2))
				legend(cols(1) rows(5) pos(10) ring(0) size(small)
				bmargin(zero) rowgap(0) colgap(0) region(fcolor(none)) 
				title("Selected values of the moderator" 
				"(Perceived prevalence of racial/ethnic minorities):", 
				j(left) size(medsmall)))
				;
#delimit cr

graph export output_hlthmigr/area.emf, as(emf) replace

* Regression table
#delimit ;
estout 	area
		using output_hlthmigr/area.txt
		, replace cells(b(star fmt("%9.3fc")) se(par fmt("%9.3fc"))) 
		stats(r2_a N, fmt(%9.3fc %9.0fc) 
		labels("R-Square" "No. of observations" )) label varlabels(_cons Constant) 
		legend nobaselevels noomitted mlabels(none) numbers
		mgroups("Worried about immigration", pattern(1))
		indicate("ESS-round (year) fixed effects = *.essround" "Country fixed effects = *.cntryc"
		"Demographic factors = agea *.gndr2 *.mrtsts4 *.domicil3 *.brncntr2"
		"Socioeconomic factors = eduyrs *.oesch5 hinctntb *.hincfel2 *.uempla *.hincsrc2a"
		"Political-cultural factors = *.nwspol2 *.clsprty2 lrscale rlgdgr impsafe ipstrgv imptrad")
		refcat(1.acetalv2 "Perceived prevalence of racial/ethnic minorities (ref. cat.: Some)")
		starlevels(° 0.10 * 0.05 ** 0.01 *** 0.001) 
		;
#delimit cr

					
					* Mediation analysis
					* ==================
**# Bookmark #4

/* Four notes on these SEMSs:
	- We removed the ESS-round fixed effects which would make the models 
		close to impossible to estimate (testing such models after an hour still
		no sign of any convergence). The standardized coefficient in the unmediated 
		model is very close to the standardized OLS coefficient estimate, so this
		reassures us that removing the fixed effects does not introduce major bias.
		Structural equation models with so many dummies can be somewhat unstable,
		depending on your version of Stata and the exact parameters included, 
		you might have to experiment with the maximum set of country dummies 
		("cntryc_3-cntryc_27") to figure out what works.
	- There are two ways to calculate the latent variable.
		First, we can set the factor loading of one of the observed variables to 1. 
		Second, we can set the variance of the latent variable to 1. 
		This is the case when we use fully standardized model by specifying 
		the option standardized. This allows each of the (standardized) 
		factor loadings (i.e., correlations) to be calculated.
	- The default way of constructing the latent variable is listwise deletion.
		This is different from how alpha calculates a common index, which are
		used in the OLS models. Therefore, the sample sizes in the OLS and SEM
		models differ even though we are using the same set of variables. 
		This can be avoided by changing the estimation method to mlmv, which does
		not drop missing values. Combined with using in_pols as the estimation
		sample, we have exactly the same number of observations as in the OLS.
	- Regression table: there is no accepted way to present the regression table with
		SEM results, some fine-tuning of the estout table is needed.
*/

**# Bookmark #2

* Define global variable for controls 29 countries
#delimit	;
global 		Med1 agea gndr2 mrtsts4tab2-mrtsts4tab4 domicil3tab2 domicil3tab3 brncntr2
			eduyrs oesch5tab2-oesch5tab5 hinctntb hincfel2 uempla hincsrc2a
			lrscale nwspol2 clsprty2 rlgdgr impsafe ipstrgv imptrad
			cntryc_3-cntryc_27
			;
#delimit cr

** Main mediation model
qui reg idno imbgecor imuecltr imwbcntr health trstpltr trstprlr trstlglr imblecor stfhlthr $Med1 i.cntryc [pw=anweight], vce(cluster cntryround)
cap drop in_sem
gen in_sem = e(sample)

#delimit	;
sem			(Trstr -> trstpltr trstprlr trstlglr)
			(Migrw -> imbgecor imuecltr imwbcntr)
			(imblecor <- health $Med1)
			(ppltrstr <- health $Med1)
			(Trstr <- health $Med1)
			(Migrw <- health imblecor Trstr ppltrstr $Med1)
			if in_sem [pw=anweight]
			, standardized vce(cluster cntryround)
			;
#delimit cr
est sto sem1
estat eqgof


* Standardized mediation path estimates (Table 3)
medsem, indep(health) med(imblecor) dep(Migrw) stand rit
medsem, indep(health) med(ppltrstr) dep(Migrw) stand rit
medsem, indep(health) med(Trstr) dep(Migrw) stand rit


* Full regression table, appendix
#delimit ;
estout 	sem1
		using output_hlthmigr/sem1.txt
		, replace cells((b_std(star fmt(%9.3fc)) se(fmt(%9.3fc)) z(fmt(%9.3fc)) p(fmt(%9.3fc)))) 
		stats(N, fmt(%9.0fc) labels("No. of observations" )) label 
		collabels("Standardized coefficient" "S.E." "z" "p")
		eqlabels("Dep. var.: Immigrants drain public services" 
		"Dep. var.: Diminished interpersonal trust"
		"Dep. var.: Distrust in political institutions"
		"Dep. var.: Worried about immigration")
		varlabels(
		_cons Constant  
		Migrw "Worried about immigration" 
		Trstr "Distrust in political institutions"
		var(e.ppltrstr)  "var(e.Interpersonal trust)"
		var(e.trstpltr) "var(e.Trust in politicians)"
		var(e.trstprlr) "var(e.Trust in parliament)"
		var(e.trstlglr) "var(e.Trust in legal syststem)"
		var(e.imbgecor) "var(e.Immigration is bad for the economy)"
		var(e.imuecltr) "var(e.Immigration is bad for culture)"
		var(e.imwbcntr) "var(e.Immigration makes country worse)"
		var(e.imblecor) "var(e.Immigrants drain public services)"
		var(e.Trstr) "var(e.Distrust in political institutions)"
		var(e.Migrw) "var(e.Worried about immigration)"
				) 
		legend mlabels(none) numbers
		refcat(agea "Demographic factors" eduyrs "Socioeconomic factors" 
		lrscale "Political-cultural factors"
		oesch5tab2 "Social class (ref. cat.: Higher-grade service)"
		mrtsts4tab2 "Marital status (ref. cat.: Married or partnered)"
		domicil3tab2 "Domicile (ref. cat.: Big city or suburb)"
		var(e.trstpltr) "Variance components", nolabel)
		drop(cntryc_*)
		;
#delimit cr


** Mediation with fear of losing access to health and dissatisfaction with health services in ESS4 

* Define global variable (round 4, 21 countries)
#delimit	;
global 		Med2 agea gndr2 mrtsts4tab2-mrtsts4tab4 domicil3tab2 domicil3tab3 brncntr2
			eduyrs oesch5tab2-oesch5tab5 hinctntb hincfel2 uempla hincsrc2a
			lrscale nwspol2 clsprty2 rlgdgr impsafe ipstrgv imptrad
			cntryc_2 cntryc_4 cntryc_6 cntryc_7 cntryc_8 cntryc_10 
			cntryc_11 cntryc_12 cntryc_13 cntryc_14 cntryc_15 cntryc_16 cntryc_17 
			cntryc_22 cntryc_23 cntryc_24 cntryc_25 cntryc_26 cntryc_27 cntryc_28
			;
#delimit cr

qui reg idno lknhlcn imbgecor imuecltr imwbcntr health ppltrstr trstpltr trstprlr trstlglr imblecor stfhlthr $Med2 i.cntryc [pw=anweight], vce(cluster cntry)
cap drop in_sem2
gen in_sem2 = e(sample)

#delimit	;
sem			(Trstr -> trstpltr trstprlr trstlglr)
			(Migrw -> imbgecor imuecltr imwbcntr)
			(stfhlthr <- health $Med2)
			(imblecor <- stfhlthr $Med2)
			(ppltrstr <- health $Med2)
			(Trstr <- health $Med2)
			(lknhlcn <- imblecor ppltrstr Trstr health $Med2)
			(Migrw <- lknhlcn imblecor ppltrstr Trstr $Med2)
			if in_sem2 [pw=anweight]
			, standardized vce(cluster cntry)
			;
#delimit cr 
est sto sem2
estat eqgof

* Full regression table, appendix
#delimit ;
estout 	sem2
		using output_hlthmigr/sem2.txt
		, replace cells((b_std(star fmt(%9.3fc)) se(fmt(%9.3fc)) z(fmt(%9.3fc)) p(fmt(%9.3fc)))) 
		stats(N, fmt(%9.0fc) labels("No. of observations" )) label 
		collabels("Standardized coefficient" "S.E." "z" "p")
		eqlabels(
		"Dep. var.: Dissatisfied with the health system"
		"Dep. var.: Immigrants drain public services" 
		"Dep. var.: Diminished interpersonal trust"
		"Dep. var.: Fear of losing access to public health services"
		"Dep. var.: Distrust in political institutions"
		"Dep. var.: Worried about immigration")		
		varlabels(
		_cons Constant 
		Trstr "Distrust in political institutions" 
		Migrw "Worried about immigration" 
		var(e.trstpltr) "var(e.Trust in politicians)"
		var(e.trstprlr) "var(e.Trust in parliament)"
		var(e.trstlglr) "var(e.Trust in legal syststem)"
		var(e.imbgecor) "var(e.Immigration is bad for the economy)"
		var(e.imuecltr) "var(e.Immigration is bad for culture)"
		var(e.imwbcntr) "var(e.Immigration makes country worse)"
		var(e.imblecor) "var(e.Immigrants drain public services)"
		var(e.stfhlthr) "var(e.Dissatisfaction with health services)"
		var(e.lknhlcn) "var(e.Fear of losing access to public health services)"
		var(e.Trstr) "var(e.Distrust in political institutions)"
		var(e.Migrw) "var(e.Worried about immigration)"
				) 
		legend mlabels(none) numbers
		refcat(agea "Demographic factors" eduyrs "Socioeconomic factors" 
		lrscale "Political-cultural factors" 
		oesch5tab2 "Social class (ref. cat.: Higher-grade service)"
		mrtsts4tab2 "Marital status (ref. cat.: Married or partnered)"
		domicil3tab2 "Domicile (ref. cat.: Big city or suburb)"
		var(e.trstpltr) "Variance components", nolabel)
		drop(cntryc_*)
		;
#delimit cr

** Mediation with restrict immigrants' social rights
* Define global variable (round 4, 21 countries)
#delimit	;
global 		Med2 agea gndr2 mrtsts4tab2-mrtsts4tab4 domicil3tab2 domicil3tab3 brncntr2
			eduyrs oesch5tab2-oesch5tab5 hinctntb hincfel2 uempla hincsrc2a
			lrscale nwspol2 clsprty2 rlgdgr impsafe ipstrgv imptrad
			cntryc_2 cntryc_4 cntryc_6 cntryc_7 cntryc_8 cntryc_10 
			cntryc_11 cntryc_12 cntryc_13 cntryc_14 cntryc_15 cntryc_16 cntryc_17 
			cntryc_22 cntryc_23 cntryc_24 cntryc_25 cntryc_26 cntryc_27 cntryc_28
			;
#delimit cr

#delimit	;
sem			(Trstr -> trstpltr trstprlr trstlglr)
			(stfhlthr <- health $Med2)
			(imblecor <- stfhlthr $Med2)
			(ppltrstr <- health $Med2)
			(Trstr <- health $Med2)
			(lknhlcn <- imblecor ppltrstr Trstr health $Med2)
			(imsclbn2 <- lknhlcn imblecor ppltrstr Trstr $Med2)
			if in_sem2 [pw=anweight]
			, standardized vce(cluster cntry)
			;
#delimit cr 

estat eqgof
est sto sem3

* Full regression table, appendix
#delimit ;
estout 	sem3
		using output_hlthmigr/sem3.txt
		, replace cells((b_std(star fmt(%9.3fc)) se(fmt(%9.3fc)) z(fmt(%9.3fc)) p(fmt(%9.3fc)))) 
		stats(N, fmt(%9.0fc) labels("No. of observations" )) label 
		collabels("Standardized coefficient" "S.E." "z" "p")
		eqlabels(
		"Dep. var.: Dissatisfied with the health system"
		"Dep. var.: Immigrants drain public services" 
		"Dep. var.: Diminished interpersonal trust"
		"Dep. var.: Fear of losing access to public health services"
		"Dep. var.: Restrict immigrants' social rights"
		"Dep. var.: Distrust in political institutions")		
		varlabels(
		_cons Constant 
		Trstr "Distrust in political institutions" 
		var(e.trstpltr) "var(e.Trust in politicians)"
		var(e.trstprlr) "var(e.Trust in parliament)"
		var(e.trstlglr) "var(e.Trust in legal syststem)"
		var(e.imblecor) "var(e.Immigrants drain public services)"
		var(e.stfhlthr) "var(e.Dissatisfaction with health services)"
		var(e.lknhlcn) "var(e.Fear of losing access to public health services)"
		var(e.Trstr) "var(e.Distrust in political institutions)"
		var(e.imsclbn2) "var(e.Restrict immigrants' social rights)"
				) 
		legend mlabels(none) numbers
		refcat(agea "Demographic factors" eduyrs "Socioeconomic factors" 
		lrscale "Political-cultural factors"
		oesch5tab2 "Social class (ref. cat.: Higher-grade service)"
		mrtsts4tab2 "Marital status (ref. cat.: Married or partnered)"
		domicil3tab2 "Domicile (ref. cat.: Big city or suburb)"
		var(e.trstpltr) "Variance components", nolabel)
		drop(cntryc_*)
		;
#delimit cr